import matplotlib.pyplot as plt
import numpy as np
import cv2, pickle, glob
bins_array = [[8,8,8], [16,16,16], [32,32,32], [64,64,64]]
def descriptor(image):
# compute a 3D histogram in the RGB colorspace,
# then normalize the histogram so that images
hist = cv2.calcHist([image], [0, 1, 2],
None, bins,
[0, 256, 0, 256, 0, 256])
# return out 3D histogram as a flattened array
return hist.flatten()
def indexor(save_index):
index = {}
# use glob to grab the image paths and loop over them
for imagePath in glob.glob(database + "/*.*"):
k = imagePath[imagePath.rfind("/") + 1:]
# load the image, describe it using our RGB histogram
# descriptor, and update the index
image = cv2.imread(imagePath)
features = descriptor(image)
index[k] = features
if save_index:
with open('index', 'wb') as f:
f.write(pickle.dumps(index))
return index
bins = bins_array[0]
database = 'image/'
# index the database
index = indexor(False)
def _display(cv_image, title='Image'):
"""Convert OpenCV images from BGR to RGB for Pyplot"""
plt_image = cv2.cvtColor(cv_image, cv2.COLOR_BGR2RGB)
plt.title(title)
plt.imshow(plt_image)
plt.show()
return True
def _chi2_distance(histA, histB, eps=1e-10):
d = 0.5 * np.sum([((a - b) ** 2) / (a + b + eps)
for (a, b) in zip(histA, histB)])
return d
def query(query, rank, precision_recall=False, relevant=None):
"""Performs an image query in the CBIR database"""
print("Reading and analysing the query image...")
query_image= cv2.imread(query)
_display(cv_image=query_image, title='Query')
#3D RGB histogram with 8 bins per channel
query_features = descriptor(query_image)
print("Wait while it's searching...")
results = search(query_features)
print("Hmm so the results are...")
for j in range(rank):
(score, imageName) = results[j]
path = database + "/%s" % (imageName)
result = cv2.imread(path)
_display(result)
if precision_recall == True and relevant is not None:
result_images = [i[1] for i in results]
relevant_ranks = sorted([result_images.index(x)+1 for x in relevant])
num_relevant = range(len(relevant))
precision = np.divide(num_relevant, relevant_ranks)
recall = np.divide(num_relevant, len(relevant))
# plot precision-recall curve
plt.plot(recall, precision, 'r.-')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Graph for Bins : {}'.format(bins))
plt.axis([0, 1, 0, 1.05])
plt.show()
return None
def search(query_features):
results = {}
for (k, features) in index.items():
d = _chi2_distance(features, query_features)
# how 'similar' the image in the index is to our query
results[k] = d
# sort our results, so that the smaller distances
results = sorted([(v, k) for (k, v) in results.items()])
return results
query('image/red.jpg', 10)
With Variable Histogram Bin Sizes
Precision-Recall graphs measure the accuracy of our image retrieval system. They are also used in the performance measurement of any other search engine.
Precision
Precision is the total relevant images retrieved from a collection of relevant and irrelevant images. It denotes how precise the system is in accurately retreiving useful results from an impure or noisy collection of items.
precision = relevant_items_retrieved / (relevant_items + irrelevant_items)
Recall
Recall is the ratio of the relevant images retrieved to all the relevant images in the database. It denotes the how well the system recalls the relevant images from the all the existing relevant images.
recall = relevant_items_retrieved / total_relevant_items
CBIR engine with Bin Size : 8
query(query='image/red.jpg', rank=10,
precision_recall=True,
relevant=['red.jpg', '39.jpg', '38.jpg', '40.jpg', '44.jpg', '30.jpg'])
CBIR engine with Bin Size : 16
bins = bins_array[1]
query(query='image/red.jpg', rank=10,
precision_recall=True,
relevant=['red.jpg', '39.jpg', '38.jpg', '40.jpg', '44.jpg', '30.jpg'])
CBIR engine with Bin Size : 32
bins = bins_array[2]
query(query='image/red.jpg', rank=10,
precision_recall=True,
relevant=['red.jpg', '39.jpg', '38.jpg', '40.jpg', '44.jpg', '30.jpg'])
CBIR engine with Bin Size : 64
bins = bins_array[3]
query(query='image/red.jpg', rank=10,
precision_recall=True,
relevant=['red.jpg', '39.jpg', '38.jpg', '40.jpg', '44.jpg', '30.jpg'])